from lxml import etree

tree = etree.parse('1.html',parser=etree.HTMLParser())
# print(type(tree))
# lis = tree.findall('body/div/ol/li')
# print( lis)
# for li in lis:
#     a = li.find('a')
#     if a is not None:
#         print(a.text.strip())
#     else:
#         print(li.text.strip() if li.text else '')
#



# print(tree.xpath('/html/body/div/ol/li'))
# print(tree.xpath('//ol/li/a'))
#
# print(tree.xpath('//ol/li/a/text()'))
# print(tree.xpath('//ol/li/a/@href'))
# print(tree.xpath('//*[@href]'))
# print(tree.xpath('//*[@href="hahaha"]'))
# print(tree.xpath('//*[@href="hahaha"]/text()'))
print(tree.xpath('//li/text()'))
print(tree.xpath('//li/*[last()]/text()'))
# print(tree.xpath('//li/*[last()-1]/text()'))
print(tree.xpath('//li/a[]'))


# xpath
#   /
#  //
#  ./
#  .//
#  ../
